1 //==============================================================================
2 // file : XMLHelper.java
3 // project: Java Common Utility
4 //
5 // last change: date: $Date: 2003/09/10 09:22:14 $
6 // by: $Author: bitiboy $
7 // revision: $Revision: 1.1 $
8 //------------------------------------------------------------------------------
9 // copyright: GNU GPL Software License (see class documentation)
10 //==============================================================================
11 package com.justhis.xml;
12
13
14 /*
15 *$Id: XMLHelper.java,v 1.1 2003/09/10 09:22:14 bitiboy Exp $
16 *
17 * Copyright 2003 Acai Software All Rights Reserved.
18 *
19 * This file XMLHelper.java is part of the Java Common Utility
20
21 * The Java Common Utility is free software; you can redistribute it and/or modify
22 * it under the terms of the GNU General Public License as published by
23 * the Free Software Foundation; either version 2 of the License, or
24 * (at your option) any later version.
25
26 * Java Common Utility is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30
31 * You should have received a copy of the GNU General Public License
32 * along with the Java Common Utility; if not, write to the Free Software
33 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
34
35 * http://www.justhis.com
36 * CONTACT: email = superaxis@sohu.com webmaster@justhis.com
37 */
38 import com.justhis.util.XParameters;
39 import com.justhis.util.exception.XMLException;
40
41 import org.apache.xerces.dom.DocumentImpl;
42 import org.apache.xerces.parsers.DOMParser;
43
44 import org.apache.xml.serialize.OutputFormat;
45 import org.apache.xml.serialize.XMLSerializer;
46
47 import org.w3c.dom.Document;
48 import org.w3c.dom.Element;
49 import org.w3c.dom.NodeList;
50
51 import org.xml.sax.InputSource;
52 import org.xml.sax.SAXException;
53
54 import java.io.File;
55 import java.io.FileOutputStream;
56 import java.io.FileWriter;
57 import java.io.IOException;
58 import java.io.PrintStream;
59 import java.io.StringReader;
60 import java.io.StringWriter;
61
62 import java.net.MalformedURLException;
63 import java.net.URL;
64 import java.net.URLConnection;
65
66 import java.util.Date;
67 import java.util.Enumeration;
68
69 import javax.xml.parsers.DocumentBuilder;
70 import javax.xml.parsers.DocumentBuilderFactory;
71 import javax.xml.parsers.ParserConfigurationException;
72 import javax.xml.transform.Transformer;
73 import javax.xml.transform.TransformerConfigurationException;
74 import javax.xml.transform.TransformerException;
75 import javax.xml.transform.TransformerFactory;
76 import javax.xml.transform.dom.DOMResult;
77 import javax.xml.transform.dom.DOMSource;
78 import javax.xml.transform.stream.StreamSource;
79
80
81 /***
82 * TODO DOCUMENT ME!
83 *
84 * @author <a href="http://blog.ejb.cn">acai</a>
85 * @version $Revision: 1.1 $
86 */
87 public class XMLHelper {
88 //~ Methods ----------------------------------------------------------------
89
90 /***
91 * TODO DOCUMENT ME!
92 *
93 * @param doc TODO
94 *
95 * @return TODO
96 *
97 * @throws XMLException TODO
98 */
99 public static String convertXMLToString(Document doc)
100 throws XMLException {
101 try {
102 OutputFormat of = new OutputFormat(doc);
103 of.setIndenting(true);
104
105 StringWriter sw = new StringWriter();
106 XMLSerializer serializer = new XMLSerializer(sw, of);
107 serializer.serialize(doc);
108
109 return sw.toString();
110 } catch (IOException ioe) {
111 throw new XMLException("Unable to write to the string", ioe);
112 }
113 }
114
115 /***
116 * TODO DOCUMENT ME!
117 *
118 * @return TODO
119 */
120 public static Document createXml() {
121 Document doc = new DocumentImpl();
122
123 return doc;
124 }
125
126 /***
127 * TODO DOCUMENT ME!
128 *
129 * @param root TODO
130 *
131 * @return TODO
132 */
133 public static Document createXml(String root) {
134 Document doc = new DocumentImpl();
135 doc.appendChild(doc.createElement(root));
136
137 return doc;
138 }
139
140 /*
141 public static Document tidyHTML(String url) throws XMLException {
142 return tidyHTML(convertStringToURL(url));
143 }
144
145 public static Document tidyHTML(URL url) throws XMLException {
146 try {
147 URLConnection inConnection = url.openConnection();
148 if (inConnection.getContentType().startsWith("text/xml") ||
149 inConnection.getContentType().startsWith("text/xhtml")) {
150 // All ready an XML source
151 return parseXMLFromURL(url);
152 } else if (inConnection.getContentType().startsWith("text/html")) {
153 // An HTML source
154 InputStream is = inConnection.getInputStream();
155
156 // Clean the input stream
157 ByteArrayOutputStream out = new ByteArrayOutputStream();
158
159 int totalBytes = 0;
160 byte[] buffer = new byte[16384];
161
162 while (true) {
163 int bytesRead = is.read(buffer, 0, buffer.length);
164 if (bytesRead < 0) break;
165 // Remove binary bellow space except tab and newline
166 for (int i=0; i < bytesRead; i++) {
167 byte b = buffer[i];
168 if (b < 32 && b!= 10 && b != 13 && b != 9) b = 32;
169 buffer[i] = b;
170 }
171 out.write(buffer, 0, bytesRead);
172 totalBytes += bytesRead;
173 }
174 is.close();
175 out.close();
176
177 String outContent = out.toString();
178 InputStream in = new ByteArrayInputStream(out.toByteArray());
179
180 org.w3c.tidy.TagTable tags = org.w3c.tidy.TagTable.getDefaultTagTable();
181 tags.defineBlockTag("script");
182 tags.defineBlockTag("nowrap");
183
184 Tidy tidy = new Tidy();
185
186 //tidy.setMakeClean(true);
187 tidy.setShowWarnings(false);
188 tidy.setXmlOut(true);
189 tidy.setXmlPi(false);
190 tidy.setDocType("omit");
191 //tidy.setQuoteNbsp(true);
192 //tidy.setQuoteAmpersand(true);
193 tidy.setXHTML(false);
194 tidy.setRawOut(true);
195 tidy.setNumEntities(true);
196 tidy.setQuiet(true);
197 tidy.setFixComments(true);
198 tidy.setIndentContent(true);
199 tidy.setCharEncoding(org.w3c.tidy.Configuration.ASCII);
200
201 ByteArrayOutputStream baos = new ByteArrayOutputStream();
202 //Document resultDoc = tidy.parseDOM(in, null);
203 //if (result == null) System.err.println("Null sucker");
204 //tidy.pprint(resultDoc, baos);
205
206 org.w3c.tidy.Node tNode = tidy.parse(in, baos);
207 String result = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\" ?>\n" +
208 baos.toString();
209 // Strip the DOCTYPE and script elements
210 int startIndex = 0;
211 int endIndex = 0;
212 if ((startIndex = result.indexOf("<!DOCTYPE")) >= 0) {
213 endIndex = result.indexOf(">",startIndex);
214 result = result.substring(0,startIndex) +
215 result.substring(endIndex + 1, result.length());
216 }
217 while ((startIndex = result.indexOf("<script")) >= 0) {
218 endIndex = result.indexOf("</script>");
219 result = result.substring(0,startIndex) +
220 result.substring(endIndex + 9, result.length());
221 }
222
223 in.close();
224 baos.close();
225
226 return parseXMLFromString(result);
227
228 } else {
229 throw new XMLException("Unable to tidy content type: " +
230 inConnection.getContentType());
231 }
232 } catch (IOException ioe) {
233 throw new XMLException("Unable to perform input/output", ioe);
234 }
235 }
236 */
237 public static void main(String[] args) {
238 if (args.length < 3) {
239 printUsage();
240 System.exit(0);
241 }
242
243 File xml_in = new File(args[0]);
244 File xsl_in = new File(args[1]);
245 File xml_out = new File(args[2]);
246
247 Date[] timestamps = new Date[5];
248
249 String encoding = null;
250
251 if ((args.length > 3) && !args[3].equals("report")) {
252 encoding = args[3];
253 }
254
255 try {
256 timestamps[0] = new Date();
257
258 Document xml = parseXMLFromFile(xml_in);
259
260 if (encoding != null) {
261 xml.getDocumentElement().setAttribute("locale", encoding);
262 }
263
264 timestamps[1] = new Date();
265
266 Document xsl = parseXMLFromFile(xsl_in);
267 timestamps[2] = new Date();
268
269 Document result = transformXML(xml, xsl);
270 timestamps[3] = new Date();
271 outputXMLToFile(result, xml_out.getAbsolutePath());
272 timestamps[4] = new Date();
273 } catch (Exception ex) {
274 System.err.println("An Error was encountered: " + ex.getMessage());
275 System.exit(0);
276 }
277
278 if (((args.length > 3) && args[3].equals("report"))
279 || ((args.length > 4) && args[4].equals("report"))
280 ) {
281 System.out.println("Time to parse XML input: "
282 + getTimeDiff(timestamps[0], timestamps[1])
283 );
284 System.out.println("Time to parse XSL input: "
285 + getTimeDiff(timestamps[1], timestamps[2])
286 );
287 System.out.println("Time to perform transformation: "
288 + getTimeDiff(timestamps[2], timestamps[3])
289 );
290 System.out.println("Time to output XML/HTML: "
291 + getTimeDiff(timestamps[3], timestamps[4])
292 );
293 System.out.println("Total elapsed time: "
294 + getTimeDiff(timestamps[0], timestamps[4])
295 );
296 }
297 }
298
299 /***
300 * Copies the children of the mergeFromXML element and places them as
301 * children in the mergeToXML. Of course it recursively gets the children
302 * as well. If childrenOnly is set to false, just insert the mergeFromXML
303 * under the mergeToXML.
304 *
305 * @param mergeToXML TODO
306 * @param mergeFromXML TODO
307 * @param childrenOnly TODO
308 */
309 public static void mergeXML(Element mergeToXML, Element mergeFromXML,
310 boolean childrenOnly
311 ) {
312 Document toDoc = mergeToXML.getOwnerDocument();
313 Element copyElem = (Element) (toDoc.importNode(mergeFromXML, true));
314
315 if (childrenOnly) {
316 NodeList nlist = copyElem.getChildNodes();
317
318 for (int i = 0; i < nlist.getLength(); i++) {
319 org.w3c.dom.Node n = nlist.item(i);
320 mergeToXML.appendChild(n);
321 }
322
323 return;
324 } else {
325 mergeToXML.appendChild(copyElem);
326 }
327 }
328
329 /***
330 * TODO DOCUMENT ME!
331 *
332 * @param text TODO
333 * @param fileName TODO
334 *
335 * @throws XMLException TODO
336 */
337 public static void outputTextToFile(String text, String fileName)
338 throws XMLException {
339 try {
340 File f = new File(fileName);
341 File dir = new File(f.getParent());
342 dir.mkdirs();
343
344 FileWriter fw = new FileWriter(f);
345 fw.write(text);
346 fw.flush();
347 fw.close();
348 } catch (IOException ioe) {
349 throw new XMLException("Unable to write to the given file", ioe);
350 }
351 }
352
353 /***
354 * TODO DOCUMENT ME!
355 *
356 * @param doc TODO
357 * @param stream TODO
358 *
359 * @throws XMLException TODO
360 */
361 public static void outputXML(Document doc, PrintStream stream)
362 throws XMLException {
363 try {
364 OutputFormat of = new OutputFormat(doc);
365 of.setIndenting(true);
366
367 XMLSerializer serializer = new XMLSerializer(stream, of);
368 serializer.serialize(doc);
369 } catch (IOException ioe) {
370 throw new XMLException("Unable to write to the given print stream",
371 ioe
372 );
373 }
374 }
375
376 /***
377 * TODO DOCUMENT ME!
378 *
379 * @param doc TODO
380 * @param fileName TODO
381 *
382 * @throws XMLException TODO
383 */
384 public static void outputXMLToFile(Document doc, String fileName)
385 throws XMLException {
386 try {
387 if (doc == null) {
388 throw new IOException("Output XML document was null");
389 }
390
391 OutputFormat of = new OutputFormat(doc, "UTF-8", true);
392 File f = new File(fileName);
393 FileOutputStream fos = new FileOutputStream(f);
394 XMLSerializer serializer = new XMLSerializer(fos, of);
395 serializer.serialize(doc);
396 fos.close();
397 } catch (IOException ioe) {
398 throw new XMLException("Unable to write to the given file", ioe);
399 }
400 }
401
402 /***
403 * TODO DOCUMENT ME!
404 *
405 * @param f TODO
406 *
407 * @return TODO
408 *
409 * @throws XMLException TODO
410 */
411 public static Document parseXMLFromFile(File f) throws XMLException {
412 try {
413 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
414 factory.setNamespaceAware(true);
415
416 DocumentBuilder builder = factory.newDocumentBuilder();
417
418 return builder.parse(f);
419 } catch (IOException ex) {
420 throw new XMLException("Unable to read from source string", ex);
421 } catch (ParserConfigurationException ex) {
422 throw new XMLException("Unable to configure the parser", ex);
423 } catch (SAXException ex) {
424 throw new XMLException("Unable to parse the input", ex);
425 }
426 }
427
428 /***
429 * TODO DOCUMENT ME!
430 *
431 * @param source TODO
432 *
433 * @return TODO
434 *
435 * @throws XMLException TODO
436 */
437 public static Document parseXMLFromString(String source)
438 throws XMLException {
439 InputSource is = new InputSource(new StringReader(source));
440
441 return parseXMLFromInputSource(is);
442 }
443
444 /***
445 * TODO DOCUMENT ME!
446 *
447 * @param url TODO
448 *
449 * @return TODO
450 *
451 * @throws XMLException TODO
452 */
453 public static Document parseXMLFromURL(URL url) throws XMLException {
454 try {
455 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
456 factory.setNamespaceAware(true);
457
458 DocumentBuilder builder = factory.newDocumentBuilder();
459 URLConnection inConnection = url.openConnection();
460
461 return builder.parse(inConnection.getInputStream());
462 } catch (IOException ex) {
463 throw new XMLException("Unable to read from source string", ex);
464 } catch (ParserConfigurationException ex) {
465 throw new XMLException("Unable to configure the parser", ex);
466 } catch (SAXException ex) {
467 throw new XMLException("Unable to parse the input", ex);
468 }
469 }
470
471 /***
472 * TODO DOCUMENT ME!
473 *
474 * @param url TODO
475 *
476 * @return TODO
477 *
478 * @throws XMLException TODO
479 */
480 public static Document parseXMLFromURLString(String url)
481 throws XMLException {
482 return parseXMLFromURL(convertStringToURL(url));
483 }
484
485 /***
486 * TODO DOCUMENT ME!
487 *
488 * @param xmlDoc TODO
489 * @param xslDoc TODO
490 *
491 * @return TODO
492 *
493 * @throws XMLException TODO
494 */
495 public static Document transformXML(Document xmlDoc, Document xslDoc)
496 throws XMLException {
497 try {
498 TransformerFactory factory = TransformerFactory.newInstance();
499 Transformer transformer = factory.newTransformer(new DOMSource(xslDoc,
500 "./"
501 )
502 );
503 DOMResult result = new DOMResult();
504 transformer.transform(new DOMSource(xmlDoc), result);
505
506 org.w3c.dom.Node resultNode = result.getNode();
507
508 if (resultNode instanceof Document) {
509 return (Document) resultNode;
510 } else {
511 return result.getNode().getOwnerDocument();
512 }
513 } catch (TransformerConfigurationException ex) {
514 ex.printStackTrace();
515 throw new XMLException("Unable to perform transform "
516 + ex.getLocationAsString(), ex
517 );
518 } catch (TransformerException ex) {
519 throw new XMLException("Unable to perform transform "
520 + ex.getLocationAsString(), ex
521 );
522 }
523 }
524
525 /***
526 * TODO DOCUMENT ME!
527 *
528 * @param xmlDoc TODO
529 * @param xslDoc TODO
530 * @param params TODO
531 *
532 * @return TODO
533 *
534 * @throws XMLException TODO
535 */
536 public static Document transformXML(Document xmlDoc, Document xslDoc,
537 XParameters params
538 ) throws XMLException {
539 try {
540 TransformerFactory factory = TransformerFactory.newInstance();
541 Transformer transformer = factory.newTransformer(new DOMSource(xslDoc));
542
543 for (Enumeration e = params.keys(); e.hasMoreElements();) {
544 String key = e.nextElement().toString();
545 transformer.setParameter(key, params.get(key));
546 }
547
548 DOMResult result = new DOMResult();
549 transformer.transform(new DOMSource(xmlDoc), result);
550
551 org.w3c.dom.Node resultNode = result.getNode();
552
553 if (resultNode instanceof Document) {
554 return (Document) resultNode;
555 } else {
556 return result.getNode().getOwnerDocument();
557 }
558 } catch (TransformerConfigurationException ex) {
559 ex.printStackTrace();
560 throw new XMLException("Unable to perform transform "
561 + ex.getLocationAsString(), ex
562 );
563 } catch (TransformerException ex) {
564 throw new XMLException("Unable to perform transform "
565 + ex.getLocationAsString(), ex
566 );
567 }
568 }
569
570 /***
571 * TODO DOCUMENT ME!
572 *
573 * @param xmlDoc TODO
574 * @param xslFile TODO
575 *
576 * @return TODO
577 *
578 * @throws XMLException TODO
579 */
580 public static Document transformXML(Document xmlDoc, File xslFile)
581 throws XMLException {
582 try {
583 TransformerFactory factory = TransformerFactory.newInstance();
584 Transformer transformer = factory.newTransformer(new StreamSource(xslFile));
585
586 DOMResult result = new DOMResult();
587 transformer.transform(new DOMSource(xmlDoc), result);
588
589 org.w3c.dom.Node resultNode = result.getNode();
590
591 if (resultNode instanceof Document) {
592 return (Document) resultNode;
593 } else {
594 return result.getNode().getOwnerDocument();
595 }
596 } catch (TransformerConfigurationException ex) {
597 ex.printStackTrace();
598 throw new XMLException("Unable to perform transform "
599 + ex.getLocationAsString(), ex
600 );
601 } catch (TransformerException ex) {
602 throw new XMLException("Unable to perform transform "
603 + ex.getLocationAsString(), ex
604 );
605 }
606 }
607
608 /***
609 * TODO DOCUMENT ME!
610 *
611 * @param xmlDoc TODO
612 * @param xslFile TODO
613 * @param params TODO
614 *
615 * @return TODO
616 *
617 * @throws XMLException TODO
618 */
619 public static Document transformXML(Document xmlDoc, File xslFile,
620 XParameters params
621 ) throws XMLException {
622 try {
623 TransformerFactory factory = TransformerFactory.newInstance();
624 Transformer transformer = factory.newTransformer(new StreamSource(xslFile));
625
626 for (Enumeration e = params.keys(); e.hasMoreElements();) {
627 String key = e.nextElement().toString();
628 transformer.setParameter(key, params.get(key));
629 }
630
631 DOMResult result = new DOMResult();
632 transformer.transform(new DOMSource(xmlDoc), result);
633
634 org.w3c.dom.Node resultNode = result.getNode();
635
636 if (resultNode instanceof Document) {
637 return (Document) resultNode;
638 } else {
639 return result.getNode().getOwnerDocument();
640 }
641 } catch (TransformerConfigurationException ex) {
642 ex.printStackTrace();
643 throw new XMLException("Unable to perform transform "
644 + ex.getLocationAsString(), ex
645 );
646 } catch (TransformerException ex) {
647 throw new XMLException("Unable to perform transform "
648 + ex.getLocationAsString(), ex
649 );
650 }
651 }
652
653 /***
654 * TODO DOCUMENT ME!
655 *
656 * @param date1 TODO
657 * @param date2 TODO
658 *
659 * @return TODO
660 */
661 private static String getTimeDiff(Date date1, Date date2) {
662 long ts1 = date1.getTime();
663 long ts2 = date2.getTime();
664 long diff = ts2 - ts1;
665 double d = (double) diff / 1000.0;
666
667 return String.valueOf(d) + " seconds";
668 }
669
670 /***
671 * TODO DOCUMENT ME!
672 *
673 * @param url TODO
674 *
675 * @return TODO
676 *
677 * @throws XMLException TODO
678 */
679 private static URL convertStringToURL(String url) throws XMLException {
680 try {
681 return new URL(url);
682 } catch (MalformedURLException murle) {
683 throw new XMLException(url + " is not a well formed URL", murle);
684 }
685 }
686
687 /***
688 * TODO DOCUMENT ME!
689 *
690 * @param is TODO
691 *
692 * @return TODO
693 *
694 * @throws XMLException TODO
695 */
696 private static Document parseXMLFromInputSource(InputSource is)
697 throws XMLException {
698 Document doc = null;
699
700 try {
701 DOMParser parser = new DOMParser();
702 parser.parse(is);
703 doc = parser.getDocument();
704 } catch (IOException ioe) {
705 throw new XMLException("Unable to read from source string", ioe);
706 } catch (SAXException saxe) {
707 throw new XMLException("Unable to parse the given string", saxe);
708 }
709
710 return doc;
711 }
712
713 /***
714 * TODO DOCUMENT ME!
715 */
716 private static void printUsage() {
717 System.out.println("XMLHelper Usage:");
718 System.out.println("\t~> java XMLHelper xml_input_file xsl_input_file output_file [i18n encoding] ['report']");
719 System.out.println("\tEx: ~> java XMLHelper XML/user_interests.xml XSL/user_interests_xsl_only.xsl result.html report");
720 }
721 }
722
723
724 /*
725 * $Log: XMLHelper.java,v $
726 * Revision 1.1 2003/09/10 09:22:14 bitiboy
727 * *** empty log message ***
728 *
729 *
730 */
This page was automatically generated by Maven